Data Relations

Author

Shifa Maqsood

Create data

customers <- tibble(
  id = 1:5,
  city = c("Port Ellen", "Dufftown", NA, "Aberlour", "Tobermory"),
  postcode = c("PA42 7DU", "AB55 4DH", NA, "AB38 7RY", "PA75 6NR")
)
orders <- tibble(
  id = c(2, 3, 4, 4, 5, 5, 6, 6, 7),
  items = c(10, 18, 21, 23, 9, 11, 11, 12, 3)
)

Mutating Joins

Left join

left_data <- left_join(customers, orders, by = "id")
Warning in left_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
left_data
# A tibble: 7 × 4
     id city       postcode items
  <dbl> <chr>      <chr>    <dbl>
1     1 Port Ellen PA42 7DU    NA
2     2 Dufftown   AB55 4DH    10
3     3 <NA>       <NA>        18
4     4 Aberlour   AB38 7RY    21
5     4 Aberlour   AB38 7RY    23
6     5 Tobermory  PA75 6NR     9
7     5 Tobermory  PA75 6NR    11
left2_data <- left_join(orders, customers, by = "id")
left2_data
# A tibble: 9 × 4
     id items city      postcode
  <dbl> <dbl> <chr>     <chr>   
1     2    10 Dufftown  AB55 4DH
2     3    18 <NA>      <NA>    
3     4    21 Aberlour  AB38 7RY
4     4    23 Aberlour  AB38 7RY
5     5     9 Tobermory PA75 6NR
6     5    11 Tobermory PA75 6NR
7     6    11 <NA>      <NA>    
8     6    12 <NA>      <NA>    
9     7     3 <NA>      <NA>    

right join

right_data <- right_join(customers, orders, by = "id")
Warning in right_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
right_data
# A tibble: 9 × 4
     id city      postcode items
  <dbl> <chr>     <chr>    <dbl>
1     2 Dufftown  AB55 4DH    10
2     3 <NA>      <NA>        18
3     4 Aberlour  AB38 7RY    21
4     4 Aberlour  AB38 7RY    23
5     5 Tobermory PA75 6NR     9
6     5 Tobermory PA75 6NR    11
7     6 <NA>      <NA>        11
8     6 <NA>      <NA>        12
9     7 <NA>      <NA>         3

inner join

inner_data <- inner_join(customers, orders, by = "id")
Warning in inner_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
inner_data
# A tibble: 6 × 4
     id city      postcode items
  <dbl> <chr>     <chr>    <dbl>
1     2 Dufftown  AB55 4DH    10
2     3 <NA>      <NA>        18
3     4 Aberlour  AB38 7RY    21
4     4 Aberlour  AB38 7RY    23
5     5 Tobermory PA75 6NR     9
6     5 Tobermory PA75 6NR    11

full join

full_data <- full_join(customers, orders, by = "id")
Warning in full_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
full_data
# A tibble: 10 × 4
      id city       postcode items
   <dbl> <chr>      <chr>    <dbl>
 1     1 Port Ellen PA42 7DU    NA
 2     2 Dufftown   AB55 4DH    10
 3     3 <NA>       <NA>        18
 4     4 Aberlour   AB38 7RY    21
 5     4 Aberlour   AB38 7RY    23
 6     5 Tobermory  PA75 6NR     9
 7     5 Tobermory  PA75 6NR    11
 8     6 <NA>       <NA>        11
 9     6 <NA>       <NA>        12
10     7 <NA>       <NA>         3

Filtering Joins

semi join

#they can keep or remove rows from table

semi_data <- semi_join(customers, orders, by = "id")
semi_data
# A tibble: 4 × 3
     id city      postcode
  <int> <chr>     <chr>   
1     2 Dufftown  AB55 4DH
2     3 <NA>      <NA>    
3     4 Aberlour  AB38 7RY
4     5 Tobermory PA75 6NR
semi2_data <- semi_join(orders, customers, by = "id")
semi2_data
# A tibble: 6 × 2
     id items
  <dbl> <dbl>
1     2    10
2     3    18
3     4    21
4     4    23
5     5     9
6     5    11

anti join

anti_data <- anti_join(customers, orders, by = "id")
anti_data
# A tibble: 1 × 3
     id city       postcode
  <int> <chr>      <chr>   
1     1 Port Ellen PA42 7DU
anti2_data <- anti_join(orders, customers, by = "id")
anti2_data
# A tibble: 3 × 2
     id items
  <dbl> <dbl>
1     6    11
2     6    12
3     7     3

Mutliple joins

satisfaction <- tibble(
  id = 1:5,
  satisfaction = c(4, 3, 2, 3, 1)
)


join_1 <- left_join(customers, orders, by = "id")
Warning in left_join(customers, orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.
join_2 <- left_join(join_1, satisfaction, 
                    by = "id")




pipe_join <- customers %>%
  left_join(orders, by = "id") %>%
  left_join(satisfaction, by = "id")
Warning in left_join(., orders, by = "id"): Each row in `x` is expected to match at most 1 row in `y`.
ℹ Row 4 of `x` matches multiple rows.
ℹ If multiple matches are expected, set `multiple = "all"` to silence this
  warning.

Binding joins

new_customers <- tibble(
  id = 6:9,
  city = c("Falkirk", "Ardbeg", "Doogal", "Kirkwall"),
  postcode = c("FK1 4RS", "PA42 7EA", "G81 4SJ", "KW15 1SE")
)

bindr_data <- bind_rows(customers, new_customers)
bindr_data
# A tibble: 9 × 3
     id city       postcode
  <int> <chr>      <chr>   
1     1 Port Ellen PA42 7DU
2     2 Dufftown   AB55 4DH
3     3 <NA>       <NA>    
4     4 Aberlour   AB38 7RY
5     5 Tobermory  PA75 6NR
6     6 Falkirk    FK1 4RS 
7     7 Ardbeg     PA42 7EA
8     8 Doogal     G81 4SJ 
9     9 Kirkwall   KW15 1SE